Making Prediction with fbprophet (Facebook Prophet) Library

  • Performing time-series analysis and crime occurence prediction with the Facebook Prophet Library
In [1]:
#%matplotlib notebook

from pyspark import SparkContext
from pyspark.sql import SQLContext
from pyspark.sql.types import *
from pyspark import SparkContext
import matplotlib.pyplot as plt
import numpy as np
In [2]:
import fbprophet
In [3]:
sqlContext = SQLContext(sc)

Loading the dataset

In [4]:
dataPath ='../ts-test/Police_Department_Incident_Reports__Historical_2003_to_May_2018.csv'

crimeDataSchema = StructType([StructField("IncidntNum", LongType(), True),
                              StructField("Category", StringType(), True),
                              StructField("Descript", StringType(), True),
                              StructField("DayOfWeek", StringType(), True),
                              StructField("Date", StringType(), True),
                              StructField("Time", StringType(), True),
                              StructField("PdDistrict", StringType(), True),
                              StructField("Resolution", StringType(), True),
                              StructField("Address", StringType(), True),
                              StructField("X", DoubleType(), True),
                              StructField("Y", DoubleType(), True),
                              StructField("Location", StringType(), True),
                              StructField("PdId", LongType(), True)])

crimeDF = (sqlContext.read
           .format('csv')
           .option('delimiter', ',')
           .option('header', 'true')
           .load(dataPath, schema=crimeDataSchema))
In [5]:
from pyspark.sql.functions import udf, unix_timestamp, to_timestamp
import datetime


def parseDate(dateStr):
    return unix_timestamp(dateStr, 'MM/dd/yyyy').cast('timestamp')

crimeDF_date = (crimeDF.withColumn("Date_tmp", unix_timestamp(crimeDF.Date, 'MM/dd/yyyy').cast('timestamp')) #.withColumn("Date_tmp", udf(parseDate, TimestampType())(crimeDF.Date))
         .drop("Date")
         .withColumnRenamed("Date_tmp", "Date"))
In [6]:
crime_date_count = crimeDF_date.groupBy("Date", "Category").count().select("Date","Category", "Count").orderBy("Date")
In [7]:
def intCount(c):
    return float(c)

crime_date_count = (crime_date_count.withColumn("Count_t", udf(intCount, DoubleType())(crime_date_count.Count))
                                   .drop("Count")
                                   .withColumnRenamed("Count_t", "Count")
                                   .cache())

The dataframe was reconstructed to have "Date" in timestamp format and "Count" in Double type

In [8]:
crime_date_count.show()
+-------------------+--------------------+-----+
|               Date|            Category|Count|
+-------------------+--------------------+-----+
|2003-01-01 00:00:00|             ROBBERY| 10.0|
|2003-01-01 00:00:00|SEX OFFENSES, NON...|  1.0|
|2003-01-01 00:00:00|             ASSAULT| 96.0|
|2003-01-01 00:00:00|               FRAUD| 23.0|
|2003-01-01 00:00:00|FORGERY/COUNTERFE...| 16.0|
|2003-01-01 00:00:00|        PROSTITUTION|  6.0|
|2003-01-01 00:00:00|SEX OFFENSES, FOR...| 13.0|
|2003-01-01 00:00:00|        NON-CRIMINAL| 65.0|
|2003-01-01 00:00:00|           EXTORTION|  1.0|
|2003-01-01 00:00:00|           VANDALISM| 28.0|
|2003-01-01 00:00:00|               ARSON|  1.0|
|2003-01-01 00:00:00|             RUNAWAY|  1.0|
|2003-01-01 00:00:00|       LARCENY/THEFT| 72.0|
|2003-01-01 00:00:00|  DISORDERLY CONDUCT|  3.0|
|2003-01-01 00:00:00|      OTHER OFFENSES| 91.0|
|2003-01-01 00:00:00|           LOITERING|  1.0|
|2003-01-01 00:00:00|      SUSPICIOUS OCC| 35.0|
|2003-01-01 00:00:00|          KIDNAPPING|  3.0|
|2003-01-01 00:00:00|        EMBEZZLEMENT|  7.0|
|2003-01-01 00:00:00|            TRESPASS|  4.0|
+-------------------+--------------------+-----+
only showing top 20 rows

In [9]:
list_of_cats = [r.Category for r in crime_date_count.select("Category").distinct().collect()]
In [10]:
plt.figure(figsize=[15,10])

plt.title('Occurence of Crimes by Category')

for cat in list_of_cats:
    data = crime_date_count.where(crime_date_count["Category"] == cat).select("Date", "Count")
    dates_c = [d.Date for d in data.select("Date").collect()]
    counts_c = [r.Count for r in data.select("Count").collect()]
    plt.plot(dates_c, counts_c, label=cat)

plt.legend(loc='upper left',bbox_to_anchor=(1.04,1))
plt.show()
In [11]:
dates = crime_date_count.select("Date").collect()
dates = [d.Date for d in dates]
In [12]:
minDate = dates[0]
maxDate = dates[len(dates) - 1]

Divide the data to one from the earliest date to 2018/3/31 and one from 2018/4/1 to 2018/4/30

  • The first set is used to fit the model
  • The second set is used to check the prediction result
In [13]:
crime_date_count_sample = crime_date_count.where(crime_date_count.Date.between(minDate, datetime.datetime(2017, 5, 15, 0, 0))).cache()
crime_date_count_sample_future = crime_date_count.where(crime_date_count.Date.between(datetime.datetime(2017, 5, 16, 0, 0), maxDate)).cache()
In [14]:
crime_date_count_sample.show()
+-------------------+--------------------+-----+
|               Date|            Category|Count|
+-------------------+--------------------+-----+
|2003-01-01 00:00:00|             ROBBERY| 10.0|
|2003-01-01 00:00:00|SEX OFFENSES, NON...|  1.0|
|2003-01-01 00:00:00|             ASSAULT| 96.0|
|2003-01-01 00:00:00|               FRAUD| 23.0|
|2003-01-01 00:00:00|FORGERY/COUNTERFE...| 16.0|
|2003-01-01 00:00:00|        PROSTITUTION|  6.0|
|2003-01-01 00:00:00|SEX OFFENSES, FOR...| 13.0|
|2003-01-01 00:00:00|        NON-CRIMINAL| 65.0|
|2003-01-01 00:00:00|           EXTORTION|  1.0|
|2003-01-01 00:00:00|           VANDALISM| 28.0|
|2003-01-01 00:00:00|               ARSON|  1.0|
|2003-01-01 00:00:00|             RUNAWAY|  1.0|
|2003-01-01 00:00:00|       LARCENY/THEFT| 72.0|
|2003-01-01 00:00:00|  DISORDERLY CONDUCT|  3.0|
|2003-01-01 00:00:00|      OTHER OFFENSES| 91.0|
|2003-01-01 00:00:00|           LOITERING|  1.0|
|2003-01-01 00:00:00|      SUSPICIOUS OCC| 35.0|
|2003-01-01 00:00:00|          KIDNAPPING|  3.0|
|2003-01-01 00:00:00|        EMBEZZLEMENT|  7.0|
|2003-01-01 00:00:00|            TRESPASS|  4.0|
+-------------------+--------------------+-----+
only showing top 20 rows

In [17]:
import logging
logger = logging.getLogger()
logger.setLevel(logging.CRITICAL)  #Setting the logging level to remove unwanted warning in plots

Prediction by Category

  • Plot the predicted values and the actual value from the dataset for 1-year period predicted (2017/5/16 ~ 2018/5/15)
In [22]:
for cat in list_of_cats:

    data = (crime_date_count_sample
            .where(crime_date_count_sample.Category == cat)
            .withColumnRenamed('Date', 'ds')
            .withColumnRenamed('Count', 'y')
            .select('ds', 'y')
            .orderBy('ds').toPandas())
    
    
    model = fbprophet.Prophet();
    model.fit(data)
    
    future_df = model.make_future_dataframe(periods=365)
    forecast_df = model.predict(future_df)
    
    future_section = forecast_df.loc[(forecast_df['ds'] > datetime.datetime(2017, 5, 15, 0, 0))]    
    
    
    plt.figure(figsize=[15,10])
    plt.title(cat)
    
    plt.plot([d.Date for d in crime_date_count_sample_future.where(crime_date_count_sample_future.Category == cat).select("Date").collect()], [r.Count for r in crime_date_count_sample_future.where(crime_date_count_sample_future.Category == cat).select("Count").collect()],label="Real Data")
    
    plt.plot(future_section['ds'], future_section['yhat_upper'], label="Predicted Upper")
    plt.plot(future_section['ds'], future_section['yhat'], label="Predicted")
    plt.plot(future_section['ds'], future_section['yhat_lower'], label="Predicted Lower")
    
    plt.legend()
    plt.show()

Prediction on Crime Counts for All Crime Categories

  • The counts for different crime categories are all aggregated and prediction is made on the number of crimes occuring on a day
In [38]:
allCat_count = crimeDF_date.groupBy("Date").count().select("Date", "Count").orderBy("Date")
In [39]:
allCat_count = (allCat_count.withColumn("Count_t", udf(intCount, DoubleType())(allCat_count.Count))
                                   .drop("Count")
                                   .withColumnRenamed("Date", "ds")
                                   .withColumnRenamed("Count_t", "y")
                                   .cache())
In [40]:
allCat_count.show()
+-------------------+-----+
|                 ds|    y|
+-------------------+-----+
|2003-01-01 00:00:00|622.0|
|2003-01-02 00:00:00|411.0|
|2003-01-03 00:00:00|440.0|
|2003-01-04 00:00:00|347.0|
|2003-01-05 00:00:00|377.0|
|2003-01-06 00:00:00|400.0|
|2003-01-07 00:00:00|418.0|
|2003-01-08 00:00:00|528.0|
|2003-01-09 00:00:00|417.0|
|2003-01-10 00:00:00|399.0|
|2003-01-11 00:00:00|363.0|
|2003-01-12 00:00:00|324.0|
|2003-01-13 00:00:00|398.0|
|2003-01-14 00:00:00|406.0|
|2003-01-15 00:00:00|410.0|
|2003-01-16 00:00:00|406.0|
|2003-01-17 00:00:00|444.0|
|2003-01-18 00:00:00|352.0|
|2003-01-19 00:00:00|305.0|
|2003-01-20 00:00:00|414.0|
+-------------------+-----+
only showing top 20 rows

Split the data to one to be fitted to the model and one for testing

In [41]:
allCat_count_sample = allCat_count.where(allCat_count.ds.between(minDate, datetime.datetime(2017, 5, 15, 0, 0))).cache()
allCat_count_future = allCat_count.where(allCat_count.ds.between(datetime.datetime(2017, 5, 16, 0, 0), maxDate)).cache()
In [42]:
allCatCountPandas = allCat_count_sample.toPandas()
In [28]:
m = fbprophet.Prophet()
m.fit(allCatCountPandas)
Out[28]:
<fbprophet.forecaster.Prophet at 0x7fa2846a2090>
In [43]:
future_df = m.make_future_dataframe(periods=365)
print future_df.tail()
             ds
5608 2018-05-11
5609 2018-05-12
5610 2018-05-13
5611 2018-05-14
5612 2018-05-15
In [45]:
forecast_df = m.predict(future_df)
In [46]:
forecast_df
Out[46]:
ds trend trend_lower trend_upper yhat_lower yhat_upper additive_terms additive_terms_lower additive_terms_upper multiplicative_terms multiplicative_terms_lower multiplicative_terms_upper weekly weekly_lower weekly_upper yearly yearly_lower yearly_upper yhat
0 2003-01-01 417.705151 417.705151 417.705151 354.719001 464.054083 -8.142260 -8.142260 -8.142260 0.0 0.0 0.0 10.472430 10.472430 10.472430 -18.614689 -18.614689 -18.614689 409.562891
1 2003-01-02 417.674324 417.674324 417.674324 350.712414 447.791902 -17.240569 -17.240569 -17.240569 0.0 0.0 0.0 -0.579536 -0.579536 -0.579536 -16.661033 -16.661033 -16.661033 400.433755
2 2003-01-03 417.643498 417.643498 417.643498 376.912136 479.871923 11.734286 11.734286 11.734286 0.0 0.0 0.0 26.397566 26.397566 26.397566 -14.663281 -14.663281 -14.663281 429.377783
3 2003-01-04 417.612671 417.612671 417.612671 356.226195 459.372698 -8.366910 -8.366910 -8.366910 0.0 0.0 0.0 4.276357 4.276357 4.276357 -12.643267 -12.643267 -12.643267 409.245762
4 2003-01-05 417.581845 417.581845 417.581845 325.851519 432.202156 -38.499579 -38.499579 -38.499579 0.0 0.0 0.0 -27.876483 -27.876483 -27.876483 -10.623096 -10.623096 -10.623096 379.082266
5 2003-01-06 417.551019 417.551019 417.551019 348.392532 453.264190 -20.689700 -20.689700 -20.689700 0.0 0.0 0.0 -12.064949 -12.064949 -12.064949 -8.624752 -8.624752 -8.624752 396.861318
6 2003-01-07 417.520192 417.520192 417.520192 355.951018 459.792883 -7.295088 -7.295088 -7.295088 0.0 0.0 0.0 -0.625385 -0.625385 -0.625385 -6.669703 -6.669703 -6.669703 410.225104
7 2003-01-08 417.489366 417.489366 417.489366 369.902710 473.166090 5.693902 5.693902 5.693902 0.0 0.0 0.0 10.472430 10.472430 10.472430 -4.778528 -4.778528 -4.778528 423.183267
8 2003-01-09 417.458539 417.458539 417.458539 363.387034 467.748243 -3.550093 -3.550093 -3.550093 0.0 0.0 0.0 -0.579536 -0.579536 -0.579536 -2.970557 -2.970557 -2.970557 413.908447
9 2003-01-10 417.427713 417.427713 417.427713 391.412638 489.646021 25.134028 25.134028 25.134028 0.0 0.0 0.0 26.397566 26.397566 26.397566 -1.263539 -1.263539 -1.263539 442.561741
10 2003-01-11 417.396887 417.396887 417.396887 372.724948 472.878224 4.603005 4.603005 4.603005 0.0 0.0 0.0 4.276357 4.276357 4.276357 0.326648 0.326648 0.326648 421.999892
11 2003-01-12 417.366060 417.366060 417.366060 336.054826 443.189317 -26.090229 -26.090229 -26.090229 0.0 0.0 0.0 -27.876483 -27.876483 -27.876483 1.786254 1.786254 1.786254 391.275831
12 2003-01-13 417.335234 417.335234 417.335234 355.437321 459.469326 -8.961087 -8.961087 -8.961087 0.0 0.0 0.0 -12.064949 -12.064949 -12.064949 3.103861 3.103861 3.103861 408.374147
13 2003-01-14 417.304408 417.304408 417.304408 366.146605 468.874288 3.645162 3.645162 3.645162 0.0 0.0 0.0 -0.625385 -0.625385 -0.625385 4.270547 4.270547 4.270547 420.949570
14 2003-01-15 417.273581 417.273581 417.273581 381.650275 483.667581 15.752414 15.752414 15.752414 0.0 0.0 0.0 10.472430 10.472430 10.472430 5.279985 5.279985 5.279985 433.025996
15 2003-01-16 417.242755 417.242755 417.242755 372.891363 477.280311 5.548960 5.548960 5.548960 0.0 0.0 0.0 -0.579536 -0.579536 -0.579536 6.128496 6.128496 6.128496 422.791714
16 2003-01-17 417.211928 417.211928 417.211928 399.268983 503.393646 33.212616 33.212616 33.212616 0.0 0.0 0.0 26.397566 26.397566 26.397566 6.815050 6.815050 6.815050 450.424545
17 2003-01-18 417.181102 417.181102 417.181102 378.913314 484.737134 11.617566 11.617566 11.617566 0.0 0.0 0.0 4.276357 4.276357 4.276357 7.341209 7.341209 7.341209 428.798668
18 2003-01-19 417.150276 417.150276 417.150276 347.268697 449.291981 -20.165462 -20.165462 -20.165462 0.0 0.0 0.0 -27.876483 -27.876483 -27.876483 7.711021 7.711021 7.711021 396.984813
19 2003-01-20 417.119449 417.119449 417.119449 364.276296 463.093520 -4.134079 -4.134079 -4.134079 0.0 0.0 0.0 -12.064949 -12.064949 -12.064949 7.930870 7.930870 7.930870 412.985371
20 2003-01-21 417.088623 417.088623 417.088623 372.989169 476.407019 7.383898 7.383898 7.383898 0.0 0.0 0.0 -0.625385 -0.625385 -0.625385 8.009283 8.009283 8.009283 424.472520
21 2003-01-22 417.057796 417.057796 417.057796 385.467311 486.639780 18.429124 18.429124 18.429124 0.0 0.0 0.0 10.472430 10.472430 10.472430 7.956694 7.956694 7.956694 435.486920
22 2003-01-23 417.026970 417.026970 417.026970 366.963193 476.048755 7.205653 7.205653 7.205653 0.0 0.0 0.0 -0.579536 -0.579536 -0.579536 7.785189 7.785189 7.785189 424.232623
23 2003-01-24 416.996144 416.996144 416.996144 400.170168 502.293944 33.905780 33.905780 33.905780 0.0 0.0 0.0 26.397566 26.397566 26.397566 7.508214 7.508214 7.508214 450.901924
24 2003-01-25 416.965317 416.965317 416.965317 374.658494 478.192042 11.416630 11.416630 11.416630 0.0 0.0 0.0 4.276357 4.276357 4.276357 7.140273 7.140273 7.140273 428.381947
25 2003-01-26 416.934491 416.934491 416.934491 345.636561 450.142912 -21.179868 -21.179868 -21.179868 0.0 0.0 0.0 -27.876483 -27.876483 -27.876483 6.696615 6.696615 6.696615 395.754623
26 2003-01-27 416.903665 416.903665 416.903665 356.925855 458.468232 -5.872033 -5.872033 -5.872033 0.0 0.0 0.0 -12.064949 -12.064949 -12.064949 6.192916 6.192916 6.192916 411.031631
27 2003-01-28 416.872838 416.872838 416.872838 370.829100 473.658208 5.019579 5.019579 5.019579 0.0 0.0 0.0 -0.625385 -0.625385 -0.625385 5.644964 5.644964 5.644964 421.892417
28 2003-01-29 416.842012 416.842012 416.842012 383.966128 480.031702 15.540791 15.540791 15.540791 0.0 0.0 0.0 10.472430 10.472430 10.472430 5.068361 5.068361 5.068361 432.382803
29 2003-01-30 416.811185 416.811185 416.811185 367.678911 468.127040 3.898695 3.898695 3.898695 0.0 0.0 0.0 -0.579536 -0.579536 -0.579536 4.478231 4.478231 4.478231 420.709881
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
5583 2018-04-16 422.723281 419.453341 427.345650 360.127568 464.912637 -10.078416 -10.078416 -10.078416 0.0 0.0 0.0 -12.064949 -12.064949 -12.064949 1.986533 1.986533 1.986533 412.644865
5584 2018-04-17 422.726670 419.442674 427.379156 369.684714 478.657130 1.239799 1.239799 1.239799 0.0 0.0 0.0 -0.625385 -0.625385 -0.625385 1.865185 1.865185 1.865185 423.966470
5585 2018-04-18 422.730060 419.432841 427.428486 384.182775 485.988364 12.215310 12.215310 12.215310 0.0 0.0 0.0 10.472430 10.472430 10.472430 1.742880 1.742880 1.742880 434.945370
5586 2018-04-19 422.733449 419.423325 427.451491 375.034538 479.271844 1.041308 1.041308 1.041308 0.0 0.0 0.0 -0.579536 -0.579536 -0.579536 1.620844 1.620844 1.620844 423.774757
5587 2018-04-20 422.736838 419.412884 427.474248 397.135897 503.530475 27.898109 27.898109 27.898109 0.0 0.0 0.0 26.397566 26.397566 26.397566 1.500543 1.500543 1.500543 450.634948
5588 2018-04-21 422.740228 419.401877 427.497005 375.681164 479.961831 5.660022 5.660022 5.660022 0.0 0.0 0.0 4.276357 4.276357 4.276357 1.383665 1.383665 1.383665 428.400250
5589 2018-04-22 422.743617 419.398293 427.519762 345.395938 447.618465 -26.604391 -26.604391 -26.604391 0.0 0.0 0.0 -27.876483 -27.876483 -27.876483 1.272092 1.272092 1.272092 396.139226
5590 2018-04-23 422.747007 419.383471 427.542519 364.468884 465.746336 -10.897091 -10.897091 -10.897091 0.0 0.0 0.0 -12.064949 -12.064949 -12.064949 1.167857 1.167857 1.167857 411.849915
5591 2018-04-24 422.750396 419.373040 427.565276 371.971306 477.327559 0.447713 0.447713 0.447713 0.0 0.0 0.0 -0.625385 -0.625385 -0.625385 1.073098 1.073098 1.073098 423.198109
5592 2018-04-25 422.753786 419.362609 427.588033 383.196045 486.579274 11.462427 11.462427 11.462427 0.0 0.0 0.0 10.472430 10.472430 10.472430 0.989997 0.989997 0.989997 434.216212
5593 2018-04-26 422.757175 419.352178 427.610790 369.966818 472.324406 0.341182 0.341182 0.341182 0.0 0.0 0.0 -0.579536 -0.579536 -0.579536 0.920718 0.920718 0.920718 423.098357
5594 2018-04-27 422.760565 419.341747 427.635677 398.361805 502.908006 27.264898 27.264898 27.264898 0.0 0.0 0.0 26.397566 26.397566 26.397566 0.867332 0.867332 0.867332 450.025463
5595 2018-04-28 422.763954 419.331316 427.660430 376.039267 480.383112 5.108099 5.108099 5.108099 0.0 0.0 0.0 4.276357 4.276357 4.276357 0.831742 0.831742 0.831742 427.872053
5596 2018-04-29 422.767344 419.320885 427.685184 346.040426 450.118944 -27.060885 -27.060885 -27.060885 0.0 0.0 0.0 -27.876483 -27.876483 -27.876483 0.815598 0.815598 0.815598 395.706458
5597 2018-04-30 422.770733 419.310303 427.717883 360.125723 463.282345 -11.244726 -11.244726 -11.244726 0.0 0.0 0.0 -12.064949 -12.064949 -12.064949 0.820222 0.820222 0.820222 411.526007
5598 2018-05-01 422.774122 419.297971 427.756228 367.837599 475.525970 0.221140 0.221140 0.221140 0.0 0.0 0.0 -0.625385 -0.625385 -0.625385 0.846525 0.846525 0.846525 422.995262
5599 2018-05-02 422.777512 419.285640 427.799188 383.767065 489.843976 11.367359 11.367359 11.367359 0.0 0.0 0.0 10.472430 10.472430 10.472430 0.894929 0.894929 0.894929 434.144871
5600 2018-05-03 422.780901 419.270398 427.864619 370.265709 474.621690 0.385767 0.385767 0.385767 0.0 0.0 0.0 -0.579536 -0.579536 -0.579536 0.965303 0.965303 0.965303 423.166668
5601 2018-05-04 422.784291 419.252238 427.909015 404.800253 501.673500 27.454464 27.454464 27.454464 0.0 0.0 0.0 26.397566 26.397566 26.397566 1.056898 1.056898 1.056898 450.238755
5602 2018-05-05 422.787680 419.234907 427.933022 375.979123 483.442417 5.444660 5.444660 5.444660 0.0 0.0 0.0 4.276357 4.276357 4.276357 1.168303 1.168303 1.168303 428.232340
5603 2018-05-06 422.791070 419.222291 427.953092 346.594733 445.881341 -26.579075 -26.579075 -26.579075 0.0 0.0 0.0 -27.876483 -27.876483 -27.876483 1.297408 1.297408 1.297408 396.211995
5604 2018-05-07 422.794459 419.209676 427.976364 358.664264 458.674083 -10.623557 -10.623557 -10.623557 0.0 0.0 0.0 -12.064949 -12.064949 -12.064949 1.441392 1.441392 1.441392 412.170902
5605 2018-05-08 422.797849 419.197061 428.007275 371.114898 474.708027 0.971331 0.971331 0.971331 0.0 0.0 0.0 -0.625385 -0.625385 -0.625385 1.596716 1.596716 1.596716 423.769180
5606 2018-05-09 422.801238 419.184446 428.033125 380.893270 485.258346 12.231582 12.231582 12.231582 0.0 0.0 0.0 10.472430 10.472430 10.472430 1.759152 1.759152 1.759152 435.032820
5607 2018-05-10 422.804628 419.171831 428.057960 368.318176 480.355078 1.344282 1.344282 1.344282 0.0 0.0 0.0 -0.579536 -0.579536 -0.579536 1.923818 1.923818 1.923818 424.148910
5608 2018-05-11 422.808017 419.159215 428.081978 398.525778 500.780445 28.482812 28.482812 28.482812 0.0 0.0 0.0 26.397566 26.397566 26.397566 2.085246 2.085246 2.085246 451.290829
5609 2018-05-12 422.811406 419.146600 428.105997 374.576567 478.752158 6.513817 6.513817 6.513817 0.0 0.0 0.0 4.276357 4.276357 4.276357 2.237460 2.237460 2.237460 429.325223
5610 2018-05-13 422.814796 419.135290 428.130015 347.095766 448.565036 -25.502402 -25.502402 -25.502402 0.0 0.0 0.0 -27.876483 -27.876483 -27.876483 2.374081 2.374081 2.374081 397.312394
5611 2018-05-14 422.818185 419.129051 428.154034 362.337739 465.450313 -9.576496 -9.576496 -9.576496 0.0 0.0 0.0 -12.064949 -12.064949 -12.064949 2.488453 2.488453 2.488453 413.241690
5612 2018-05-15 422.821575 419.125337 428.175633 373.103098 478.262266 1.948389 1.948389 1.948389 0.0 0.0 0.0 -0.625385 -0.625385 -0.625385 2.573774 2.573774 2.573774 424.769964

5613 rows × 19 columns

Extracting out the 1-year period prediction

In [47]:
future_section = forecast_df.loc[(forecast_df['ds'] > datetime.datetime(2017, 5, 15, 0, 0))]    
In [48]:
future_section
Out[48]:
ds trend trend_lower trend_upper yhat_lower yhat_upper additive_terms additive_terms_lower additive_terms_upper multiplicative_terms multiplicative_terms_lower multiplicative_terms_upper weekly weekly_lower weekly_upper yearly yearly_lower yearly_upper yhat
5248 2017-05-16 421.587814 421.587814 421.587814 375.091342 476.602902 2.003862 2.003862 2.003862 0.0 0.0 0.0 -0.625385 -0.625385 -0.625385 2.629247 2.629247 2.629247 423.591675
5249 2017-05-17 421.591203 421.591203 421.591203 383.309253 485.333770 13.097095 13.097095 13.097095 0.0 0.0 0.0 10.472430 10.472430 10.472430 2.624666 2.624666 2.624666 434.688298
5250 2017-05-18 421.594593 421.594593 421.594593 374.434651 475.044538 1.990332 1.990332 1.990332 0.0 0.0 0.0 -0.579536 -0.579536 -0.579536 2.569868 2.569868 2.569868 423.584925
5251 2017-05-19 421.597982 421.597982 421.597982 402.904953 500.517707 28.856827 28.856827 28.856827 0.0 0.0 0.0 26.397566 26.397566 26.397566 2.459261 2.459261 2.459261 450.454809
5252 2017-05-20 421.601372 421.601372 421.601372 374.572186 483.119853 6.564340 6.564340 6.564340 0.0 0.0 0.0 4.276357 4.276357 4.276357 2.287983 2.287983 2.287983 428.165711
5253 2017-05-21 421.604761 421.604761 421.604761 347.351788 449.780270 -25.824413 -25.824413 -25.824413 0.0 0.0 0.0 -27.876483 -27.876483 -27.876483 2.052070 2.052070 2.052070 395.780348
5254 2017-05-22 421.608150 421.608150 421.608150 357.912752 460.795090 -10.316337 -10.316337 -10.316337 0.0 0.0 0.0 -12.064949 -12.064949 -12.064949 1.748611 1.748611 1.748611 411.291813
5255 2017-05-23 421.611540 421.611540 421.611540 371.308202 472.061804 0.750494 0.750494 0.750494 0.0 0.0 0.0 -0.625385 -0.625385 -0.625385 1.375879 1.375879 1.375879 422.362034
5256 2017-05-24 421.614929 421.614929 421.614929 380.061252 484.860818 11.405877 11.405877 11.405877 0.0 0.0 0.0 10.472430 10.472430 10.472430 0.933447 0.933447 0.933447 433.020807
5257 2017-05-25 421.618319 421.618319 421.618319 372.741122 470.416989 -0.157253 -0.157253 -0.157253 0.0 0.0 0.0 -0.579536 -0.579536 -0.579536 0.422283 0.422283 0.422283 421.461065
5258 2017-05-26 421.621708 421.621708 421.621708 396.719097 497.303615 26.242370 26.242370 26.242370 0.0 0.0 0.0 26.397566 26.397566 26.397566 -0.155196 -0.155196 -0.155196 447.864078
5259 2017-05-27 421.625098 421.625098 421.625098 377.456894 476.315293 3.481269 3.481269 3.481269 0.0 0.0 0.0 4.276357 4.276357 4.276357 -0.795088 -0.795088 -0.795088 425.106367
5260 2017-05-28 421.628487 421.628487 421.628487 338.479152 445.064459 -29.368495 -29.368495 -29.368495 0.0 0.0 0.0 -27.876483 -27.876483 -27.876483 -1.492011 -1.492011 -1.492011 392.259993
5261 2017-05-29 421.631877 421.631877 421.631877 355.352662 460.954705 -14.304095 -14.304095 -14.304095 0.0 0.0 0.0 -12.064949 -12.064949 -12.064949 -2.239146 -2.239146 -2.239146 407.327782
5262 2017-05-30 421.635266 421.635266 421.635266 369.638259 466.097291 -3.653694 -3.653694 -3.653694 0.0 0.0 0.0 -0.625385 -0.625385 -0.625385 -3.028309 -3.028309 -3.028309 417.981572
5263 2017-05-31 421.638656 421.638656 421.638656 374.166933 480.423205 6.622368 6.622368 6.622368 0.0 0.0 0.0 10.472430 10.472430 10.472430 -3.850062 -3.850062 -3.850062 428.261024
5264 2017-06-01 421.642045 421.642045 421.642045 366.388010 469.676473 -5.273400 -5.273400 -5.273400 0.0 0.0 0.0 -0.579536 -0.579536 -0.579536 -4.693863 -4.693863 -4.693863 416.368645
5265 2017-06-02 421.645434 421.645434 421.645434 390.410905 494.402964 20.849321 20.849321 20.849321 0.0 0.0 0.0 26.397566 26.397566 26.397566 -5.548245 -5.548245 -5.548245 442.494756
5266 2017-06-03 421.648824 421.648824 421.648824 366.176831 473.974911 -2.124661 -2.124661 -2.124661 0.0 0.0 0.0 4.276357 4.276357 4.276357 -6.401018 -6.401018 -6.401018 419.524163
5267 2017-06-04 421.652213 421.652213 421.652213 340.322940 440.730013 -35.115995 -35.115995 -35.115995 0.0 0.0 0.0 -27.876483 -27.876483 -27.876483 -7.239512 -7.239512 -7.239512 386.536218
5268 2017-06-05 421.655603 421.655603 421.655603 347.672013 454.444373 -20.115774 -20.115774 -20.115774 0.0 0.0 0.0 -12.064949 -12.064949 -12.064949 -8.050825 -8.050825 -8.050825 401.539829
5269 2017-06-06 421.658992 421.658992 421.658992 361.397764 466.558312 -9.447483 -9.447483 -9.447483 0.0 0.0 0.0 -0.625385 -0.625385 -0.625385 -8.822098 -8.822098 -8.822098 412.211509
5270 2017-06-07 421.662382 421.662382 421.662382 366.871955 473.018857 0.931639 0.931639 0.931639 0.0 0.0 0.0 10.472430 10.472430 10.472430 -9.540791 -9.540791 -9.540791 422.594021
5271 2017-06-08 421.665771 421.665771 421.665771 355.364868 459.680976 -10.774501 -10.774501 -10.774501 0.0 0.0 0.0 -0.579536 -0.579536 -0.579536 -10.194965 -10.194965 -10.194965 410.891270
5272 2017-06-09 421.669161 421.669161 421.669161 383.711478 485.200270 15.624002 15.624002 15.624002 0.0 0.0 0.0 26.397566 26.397566 26.397566 -10.773564 -10.773564 -10.773564 437.293162
5273 2017-06-10 421.672550 421.672550 421.672550 363.700601 466.793244 -6.990321 -6.990321 -6.990321 0.0 0.0 0.0 4.276357 4.276357 4.276357 -11.266678 -11.266678 -11.266678 414.682229
5274 2017-06-11 421.675940 421.675940 421.675940 330.586554 433.602257 -39.542273 -39.542273 -39.542273 0.0 0.0 0.0 -27.876483 -27.876483 -27.876483 -11.665790 -11.665790 -11.665790 382.133667
5275 2017-06-12 421.679329 421.679329 421.679329 345.536966 445.755536 -24.028953 -24.028953 -24.028953 0.0 0.0 0.0 -12.064949 -12.064949 -12.064949 -11.964004 -11.964004 -11.964004 397.650376
5276 2017-06-13 421.682718 421.682718 421.682718 353.769424 459.755378 -12.781620 -12.781620 -12.781620 0.0 0.0 0.0 -0.625385 -0.625385 -0.625385 -12.156235 -12.156235 -12.156235 408.901098
5277 2017-06-14 421.686108 421.686108 421.686108 369.823404 474.439660 -1.766937 -1.766937 -1.766937 0.0 0.0 0.0 10.472430 10.472430 10.472430 -12.239367 -12.239367 -12.239367 419.919171
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
5583 2018-04-16 422.723281 419.453341 427.345650 360.127568 464.912637 -10.078416 -10.078416 -10.078416 0.0 0.0 0.0 -12.064949 -12.064949 -12.064949 1.986533 1.986533 1.986533 412.644865
5584 2018-04-17 422.726670 419.442674 427.379156 369.684714 478.657130 1.239799 1.239799 1.239799 0.0 0.0 0.0 -0.625385 -0.625385 -0.625385 1.865185 1.865185 1.865185 423.966470
5585 2018-04-18 422.730060 419.432841 427.428486 384.182775 485.988364 12.215310 12.215310 12.215310 0.0 0.0 0.0 10.472430 10.472430 10.472430 1.742880 1.742880 1.742880 434.945370
5586 2018-04-19 422.733449 419.423325 427.451491 375.034538 479.271844 1.041308 1.041308 1.041308 0.0 0.0 0.0 -0.579536 -0.579536 -0.579536 1.620844 1.620844 1.620844 423.774757
5587 2018-04-20 422.736838 419.412884 427.474248 397.135897 503.530475 27.898109 27.898109 27.898109 0.0 0.0 0.0 26.397566 26.397566 26.397566 1.500543 1.500543 1.500543 450.634948
5588 2018-04-21 422.740228 419.401877 427.497005 375.681164 479.961831 5.660022 5.660022 5.660022 0.0 0.0 0.0 4.276357 4.276357 4.276357 1.383665 1.383665 1.383665 428.400250
5589 2018-04-22 422.743617 419.398293 427.519762 345.395938 447.618465 -26.604391 -26.604391 -26.604391 0.0 0.0 0.0 -27.876483 -27.876483 -27.876483 1.272092 1.272092 1.272092 396.139226
5590 2018-04-23 422.747007 419.383471 427.542519 364.468884 465.746336 -10.897091 -10.897091 -10.897091 0.0 0.0 0.0 -12.064949 -12.064949 -12.064949 1.167857 1.167857 1.167857 411.849915
5591 2018-04-24 422.750396 419.373040 427.565276 371.971306 477.327559 0.447713 0.447713 0.447713 0.0 0.0 0.0 -0.625385 -0.625385 -0.625385 1.073098 1.073098 1.073098 423.198109
5592 2018-04-25 422.753786 419.362609 427.588033 383.196045 486.579274 11.462427 11.462427 11.462427 0.0 0.0 0.0 10.472430 10.472430 10.472430 0.989997 0.989997 0.989997 434.216212
5593 2018-04-26 422.757175 419.352178 427.610790 369.966818 472.324406 0.341182 0.341182 0.341182 0.0 0.0 0.0 -0.579536 -0.579536 -0.579536 0.920718 0.920718 0.920718 423.098357
5594 2018-04-27 422.760565 419.341747 427.635677 398.361805 502.908006 27.264898 27.264898 27.264898 0.0 0.0 0.0 26.397566 26.397566 26.397566 0.867332 0.867332 0.867332 450.025463
5595 2018-04-28 422.763954 419.331316 427.660430 376.039267 480.383112 5.108099 5.108099 5.108099 0.0 0.0 0.0 4.276357 4.276357 4.276357 0.831742 0.831742 0.831742 427.872053
5596 2018-04-29 422.767344 419.320885 427.685184 346.040426 450.118944 -27.060885 -27.060885 -27.060885 0.0 0.0 0.0 -27.876483 -27.876483 -27.876483 0.815598 0.815598 0.815598 395.706458
5597 2018-04-30 422.770733 419.310303 427.717883 360.125723 463.282345 -11.244726 -11.244726 -11.244726 0.0 0.0 0.0 -12.064949 -12.064949 -12.064949 0.820222 0.820222 0.820222 411.526007
5598 2018-05-01 422.774122 419.297971 427.756228 367.837599 475.525970 0.221140 0.221140 0.221140 0.0 0.0 0.0 -0.625385 -0.625385 -0.625385 0.846525 0.846525 0.846525 422.995262
5599 2018-05-02 422.777512 419.285640 427.799188 383.767065 489.843976 11.367359 11.367359 11.367359 0.0 0.0 0.0 10.472430 10.472430 10.472430 0.894929 0.894929 0.894929 434.144871
5600 2018-05-03 422.780901 419.270398 427.864619 370.265709 474.621690 0.385767 0.385767 0.385767 0.0 0.0 0.0 -0.579536 -0.579536 -0.579536 0.965303 0.965303 0.965303 423.166668
5601 2018-05-04 422.784291 419.252238 427.909015 404.800253 501.673500 27.454464 27.454464 27.454464 0.0 0.0 0.0 26.397566 26.397566 26.397566 1.056898 1.056898 1.056898 450.238755
5602 2018-05-05 422.787680 419.234907 427.933022 375.979123 483.442417 5.444660 5.444660 5.444660 0.0 0.0 0.0 4.276357 4.276357 4.276357 1.168303 1.168303 1.168303 428.232340
5603 2018-05-06 422.791070 419.222291 427.953092 346.594733 445.881341 -26.579075 -26.579075 -26.579075 0.0 0.0 0.0 -27.876483 -27.876483 -27.876483 1.297408 1.297408 1.297408 396.211995
5604 2018-05-07 422.794459 419.209676 427.976364 358.664264 458.674083 -10.623557 -10.623557 -10.623557 0.0 0.0 0.0 -12.064949 -12.064949 -12.064949 1.441392 1.441392 1.441392 412.170902
5605 2018-05-08 422.797849 419.197061 428.007275 371.114898 474.708027 0.971331 0.971331 0.971331 0.0 0.0 0.0 -0.625385 -0.625385 -0.625385 1.596716 1.596716 1.596716 423.769180
5606 2018-05-09 422.801238 419.184446 428.033125 380.893270 485.258346 12.231582 12.231582 12.231582 0.0 0.0 0.0 10.472430 10.472430 10.472430 1.759152 1.759152 1.759152 435.032820
5607 2018-05-10 422.804628 419.171831 428.057960 368.318176 480.355078 1.344282 1.344282 1.344282 0.0 0.0 0.0 -0.579536 -0.579536 -0.579536 1.923818 1.923818 1.923818 424.148910
5608 2018-05-11 422.808017 419.159215 428.081978 398.525778 500.780445 28.482812 28.482812 28.482812 0.0 0.0 0.0 26.397566 26.397566 26.397566 2.085246 2.085246 2.085246 451.290829
5609 2018-05-12 422.811406 419.146600 428.105997 374.576567 478.752158 6.513817 6.513817 6.513817 0.0 0.0 0.0 4.276357 4.276357 4.276357 2.237460 2.237460 2.237460 429.325223
5610 2018-05-13 422.814796 419.135290 428.130015 347.095766 448.565036 -25.502402 -25.502402 -25.502402 0.0 0.0 0.0 -27.876483 -27.876483 -27.876483 2.374081 2.374081 2.374081 397.312394
5611 2018-05-14 422.818185 419.129051 428.154034 362.337739 465.450313 -9.576496 -9.576496 -9.576496 0.0 0.0 0.0 -12.064949 -12.064949 -12.064949 2.488453 2.488453 2.488453 413.241690
5612 2018-05-15 422.821575 419.125337 428.175633 373.103098 478.262266 1.948389 1.948389 1.948389 0.0 0.0 0.0 -0.625385 -0.625385 -0.625385 2.573774 2.573774 2.573774 424.769964

365 rows × 19 columns

Plot of the data and prediction for (2017/5/16~2018/5/15) following the data

In [50]:
m.plot(forecast_df)
plt.show()

Extracting the prediction for the 1-year period between 2017/5/16 ~ 2018/5/15

In [54]:
plt.figure(figsize=[15,10])
plt.title("Prediction on Total Crime Count for 2017/5/16 ~ 2018/5/15")
plt.plot([d.ds for d in allCat_count_future.select("ds").collect()], [r.y for r in allCat_count_future.select("y").collect()], label="Real Data")
plt.plot(future_section['ds'], future_section['yhat_upper'], label="Predicted Upper")
plt.plot(future_section['ds'], future_section['yhat'], label="Prediction")
plt.plot(future_section['ds'], future_section['yhat_lower'], label="Predicted Lower")

plt.legend()
plt.show()
In [ ]: